packages <- function(x){
x <- as.character(match.call()[[2]])
if (!require(x,character.only=TRUE)){
install.packages(pkgs=x,repos="http://cran.r-project.org")
require(x,character.only=TRUE)
}
}
packages(ggplot2)
packages(googleVis)
packages(reshape2)
packages(plotly)
packages(RColorBrewer)
packages(readr)
packages(rpart)
packages(dplyr)
packages(rpart.plot)
op <- options(gvis.plot.tag='chart')setwd("~/LDCDemo")
loan <- read_csv("loan_df_reclean_new.csv")
head(loan)plot_status <- as.data.frame(table(loan$loan_status))
colnames(plot_status) <- list("Loan Status", "Freq")
status_bar <- gvisColumnChart(plot_status, options = list(legend = "none", width = 400,
height = 200))
status_pie <- gvisPieChart(plot_status, options = list(width = 400, height = 200))
merged_plot <- gvisMerge(status_bar, status_pie, tableOptions = "cellspacing=\"20\" bgcolor=\"#AABBCC\"",
horizontal = TRUE)
cat(merged_plot$html$chart, file = "merged_plot.html")
plot_statustotal_loss <- loan %>% select(loan_amnt, label) %>% group_by(label) %>% summarise(total_loan_amnt = sum(loan_amnt))
total_loss$Loan_Status <- c("Bad", "Good")
total_loss$count.style <- c("red", "darkblue")
plot_loss <- gvisBarChart(total_loss, xvar = "Loan_Status", yvar = c("total_loan_amnt",
"count.style"), options = list(title = "Total Loss Due to Bad Loan", height = 400,
width = 800, legend = "none"))
plot(plot_loss)p_grade_interest <- plot_ly(loan, y = ~int_rate, color = ~grade, type = "box")
p_grade_interestplot_grade <- loan %>% select(grade, label) %>% group_by(grade, label) %>% summarise(total = n())
reshaped <- dcast(plot_grade, grade ~ label)
SteppedArea <- gvisSteppedAreaChart(reshaped, xvar = "grade", yvar = c("0",
"1"), options = list(isStacked = "percent"))
plot(SteppedArea)value_by_state <- loan %>% group_by(addr_state) %>% summarise(value = sum(loan_amnt,
na.rm = TRUE))
tbl <- head(value_by_state[order(-value_by_state$value), ], 10)
colnames(tbl)[2] <- "Total Loan Amt"
Tbl <- gvisTable(tbl, options = list(height = 300, width = 200))
GeoStates <- gvisGeoChart(value_by_state, "addr_state", "value", options = list(region = "US",
displayMode = "regions", resolution = "provinces", width = 600, height = 400))
plot(gvisMerge(GeoStates, Tbl, horizontal = TRUE))|
|
|
plot_purpose <- loan %>% select(loan_amnt, label, purpose) %>% group_by(purpose) %>%
summarise(total = n(), avg_loan = mean(loan_amnt))
Bubble <- gvisBubbleChart(plot_purpose, idvar = "purpose", xvar = "total", yvar = "avg_loan",
colorvar = "purpose", sizevar = "total", options = list(title = "Major Loan Purpose",
hAxis = "{ title: 'Count',
titleTextStyle: {color: 'black'},
viewWindowMode:'explicit',
viewWindow: {max: 225000}}",
vAxis = "{ title: 'Average Loan Amount',
titleTextStyle: {
color: 'black'}}",
legend = "none", colorAxis = "{colors: ['red', 'blue']}", height = 400,
width = 800))
cat(Bubble$html$chart, file = "Bubble.html")
df_rate <- loan %>% select(label, purpose) %>% group_by(purpose) %>% summarise(total_n = n(),
full_payment_rate = mean(label))
df_rate$default_rate <- 1 - round(df_rate$full_payment_rate, 2)
df_rate <- df_rate[order(df_rate$default_rate), ]
# control bar color
colfunc <- colorRampPalette(c("darkgreen", "red"))
df_rate$count.style <- colfunc(14)
Bar <- gvisBarChart(df_rate, xvar = "purpose", yvar = c("default_rate", "count.style"),
options = list(title = "Default Rate by Loan Purpose", height = 400, width = 800,
legend = "none"))
# plot(Bar) cat(Bar$html$chart, file = 'Bar.html')
plot(gvisMerge(Bubble, Bar, horizontal = F))|
|
|
|
loan$label_fct <- ifelse(loan$label == 1, "Good", "Bad")
loan_sub <- filter(loan, loan$dti < 100)
dti_box <- ggplot(loan_sub, aes(label_fct, dti, fill = label_fct)) + geom_boxplot(outlier.shape = NA) +
ggtitle("Bad Loans Have Higher Debt-to-Income Ratio")
dti_box <- plotly_build(dti_box)
dti_box$x$data <- lapply(dti_box$x$data, FUN = function(x) {
x$marker = list(opacity = 0)
return(x)
})
dti_boxplot_home <- loan %>% select(home_ownership, label_fct, loan_amnt) %>% group_by(home_ownership,
label_fct) %>% summarise(countn = n())
reshaped_home <- dcast(plot_home, label_fct ~ home_ownership)
SteppedArea_home <- gvisSteppedAreaChart(reshaped_home, xvar = "label_fct",
yvar = names(reshaped_home)[2:5], options = list(isStacked = "percent",
title = "Home Ownership vs. Loan Status"))
plot(SteppedArea_home)loan <- read_csv("testData_prob.csv")
loan_sample <- sample_n(loan, 10000)
fit <- rpart(label ~ home_ownership + grade + dti,
data=loan,
method="class", control =rpart.control(minsplit = 100 ,minbucket= 4, cp = .001))
rpart.plot(fit, tweak=1)